*Insert your file paths here
global CBP_input ""

import excel "$CBP_input/list1.xls", sheet("List 1") clear
keep if E=="Metropolitan Statistical Area"
keep A E
destring A, gen(msa) force
drop A
drop if msa==.
keep msa
duplicates drop
tempfile temp1
save `temp1'

import delimited using "$CBP_input/cbp12msa.txt", clear
keep if substr(naics,-2,2)=="//"
gen naics4=substr(naics,1,4)
destring naics4, force replace
drop if naics4==.
keep if naics4>2999 & naics4<4000
merge m:1 msa using `temp1', keep(3) nogen

keep if naics=="3361//" 

count
*There are 118 MSAs with at least one plant in 3361.

sum est, d

gsort -est

list msa emp n* if _n==1

/*
_n=1 is msa 19820, which is Detroit-Warren-Dearborn

There are 3 establishemnts in n1000_3 and 3 in n1000_2. 
The bottom 16 plants are in the following categories:
n1-4 6.    Max employment: 4x6 = 24
n10-19 1.  Max employment: 1x19 = 19
n20-49 6.  Max employment: 6*49 = 294
n50-99 1.  Max employment: 1 x 99 = 99
n250-499 1 Max employment: 1x 499 = 499
n1000_1 1  Max employment: 1x 1499 = 1499


The total maximum employent in bottom 16 plants: 2434
The total MSA employment in the industry: 18828
The minimum percent employment in top 6 plants: (18828-2434)/18828 = 0.87

*/
